--- title: Caseload Modelling Introduction keywords: fastai sidebar: home_sidebar nb_path: "20. Caseload Modelling Introduction.ipynb" ---
{% raw %}
{% endraw %}
  • Cases classified as 'Ignored Cases' have been removed in this analysis.
  • Adjudicated cases have not been removed in order to include this feature in some analysis.
{% raw %}
import pandas as pd
import numpy  as np
from datetime import datetime
import altair as alt

from vega_datasets import data
_=alt.data_transformers.disable_max_rows()

#year_portion_limit = 0.8

debug = 1

CESSATION_CONTINUATION = {
    'Driver Deceased': 'Deceased', 
    'Drivers found medically unfit to drive':'Cessation',
    'Drivers that did not respond; cancelled license':'Cessation',
    'Drivers that voluntarily surrendered their license':'Cessation',
    'Drivers ultimately found fit to drive':'Continuation',
    'Cases remaining open at time of reporting':'Continuation'
}


data_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\\'
generated_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\Generated\\'
{% endraw %} {% raw %}
# caseload_data_by_month['Opened Month'] = caseload_data_by_month.apply(lambda x: x['CASE_OPENED_DT'].strftime('%b') + '-' + x['CASE_OPENED_DT'].strftime('%Y'), axis=1)
#hideinput
def process_file(file_name):
    #'BIRTHDATE, LAST_STATUS_DATE, PREV_CASE_END_DT'
    data = pd.read_csv(file_name, parse_dates=['CASE_OPENED_DT'], dtype={'DRIVERS_LICENSE_NO': str})
    return data
    
data2020_fpath = data_file_path + 'generated\Sep_2020_toNov2021_ADJUDICATEDCASELOAD.csv'    
Sept2020_exploded_df = process_file(data2020_fpath)    
{% endraw %} {% raw %}
#caseload_data_by_month

# %run ./helpers.py
# r = derive_statuscount_percase(cases2018_df, ftedays_df)

# monthly_summary_statuscounts = r[0]
{% endraw %}

Caseload definition

scenario 1: If a case openes in Jan 2020, and is closed June 2020 then it will show up as a caseload case for Jan, Feb, March, April and May.

As of June 2020 it will be removed from caseload counts.

{% raw %}
aggregation = {
#    'Case Count': ('DRIVERS_LICENSE_NO','nunique'),
    'Open Count': ('Open Count','sum'),
    'Closed Count': ('Closed Count','sum'),
    'Status Count': ('STATUS_COUNT','sum'),

    }

# Sept2020_monthly_counts = Sept2020_exploded_df.groupby([pd.Grouper(freq='M', key='CASE_OPENED_DT') , 
#                               pd.Grouper(key='Year Span'),
#                               pd.Grouper(key='CASE_DSC'),                                   
#                               pd.Grouper(key='Status'),
#                               pd.Grouper(key='DRIVERS_LICENSE_NO'),
#                              ]).agg(** aggregation)

# Sept2020_monthly_counts = pd.DataFrame(Sept2020_monthly_counts).reset_index()


Adjudicated_Sept2020_exploded_df = Sept2020_exploded_df[Sept2020_exploded_df['Is Adjudicated'] == 'Adjudicated']
Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_exploded_df.groupby([pd.Grouper(freq='M', key='CASE_OPENED_DT') , 
                              pd.Grouper(key='Year Span'),
                              pd.Grouper(key='CASE_DSC'),                                   
                              pd.Grouper(key='Status'),
                              pd.Grouper(key='DRIVERS_LICENSE_NO'),
                             ]).agg(** aggregation)

Adjudicated_Sept2020_monthly_counts = pd.DataFrame(Adjudicated_Sept2020_monthly_counts).reset_index()
{% endraw %} {% raw %}
# aggregation = {
#     'Case Count': ('DRIVERS_LICENSE_NO','nunique'),
#     'Open Count': ('Open Count','sum'),
#     'Status Count': ('STATUS_COUNT','sum'),

#     }

# Adjudicated_Sept2020_exploded_df = Sept2020_exploded_df[Sept2020_exploded_df['Is Adjudicated'] == 'Adjudicated']
# Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_exploded_df.groupby([ pd.Grouper(key='Year Span'),
#                               pd.Grouper(key='Status'),
#                               pd.Grouper(key='CASE_DSC'),                                   
#                              ]).agg(** aggregation)

# Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_monthly_counts.reset_index()
# Adjudicated_Sept2020_monthly_counts
{% endraw %} {% raw %}
montly_backlog_all = alt.Chart(Adjudicated_Sept2020_monthly_counts).mark_area(point=True).encode(
    y = alt.Y("sum(Open Count):Q" ),
    x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
   color = "CASE_DSC:N",
   tooltip=['sum(Open Count)', 'sum(Status Count)'],
    #column='Status'
).properties(
    width=600,
    height=200
)

montly_backlog_all
{% endraw %} {% raw %}
montly_backlog_all = alt.Chart(Adjudicated_Sept2020_monthly_counts).mark_area(point=True).encode(
    y = alt.Y("sum(Closed Count):Q" ),
    x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
   color = "CASE_DSC:N",
   tooltip=['sum(Closed Count)', 'sum(Status Count)'],
    #column='Status'
).properties(
    width=600,
    height=200
)

montly_backlog_all
{% endraw %}

Fig 1. Adjudicated Cases opened in Sept 2020. This shows the monthly Open Case total for these cases that are still open over the following months.

{% raw %}
#Adjudicated_Sept2020_monthly_counts

montly_backlog_all = alt.Chart(Adjudicated_Sept2020_monthly_counts).transform_filter(
    "datum['Status'] == 'Open'"
).mark_area(point=True).encode(
    y = alt.Y("sum(Status Count):Q" ),
    x = alt.X("Year Span:T",  scale=alt.Scale(zero=False) ),
   color = "CASE_DSC:N",
   tooltip=['sum(Status Count)', 'sum(Open Count)'],
    #column='Status'
).properties(
    width=600,
    height=200
)

montly_backlog_all
{% endraw %}

Figure 2. Adjudicated Cases opened in Sept 2020. This shows the monthly total of case status changes over the lifespan of these Cases

Scratch Pad and Tests

{% raw %}
open_count1 = Adjudicated_Sept2020_monthly_counts[Adjudicated_Sept2020_monthly_counts['Year Span'] == '2020-09-30']['Open Count'].sum()

open_count2 = Adjudicated_Sept2020_exploded_df[Adjudicated_Sept2020_exploded_df['Year Span'] == '2020-09-30']['Open Count'].sum()

assert open_count1 == open_count2, 'Data Set Counts of open cases are NOT consistent'
print('Data Set Counts of open cases ARE consistent')
Data Set Counts of open cases ARE consistent
{% endraw %} {% raw %}
closed_count1 = Adjudicated_Sept2020_monthly_counts[Adjudicated_Sept2020_monthly_counts['Year Span'] == '2020-09-30']['Open Count'].sum()

closed_count2 = Adjudicated_Sept2020_exploded_df[Adjudicated_Sept2020_exploded_df['Year Span'] == '2020-09-30']['Open Count'].sum()

assert closed_count1 == closed_count2, 'Data Set Counts of closed cases are NOT consistent'
print( 'Data Set Counts of closed cases ARE consistent')
Data Set Counts of closed cases ARE consistent
{% endraw %}